setwd("C:/Users/Skasiv/Dropbox/My PC (LAPTOP-L8J5JVJ8)/Desktop/DVA/R")

Customer_Acquisition=read.csv("C:/Users/Skasiv/Dropbox/My PC (LAPTOP-L8J5JVJ8)/Desktop/DVA/R/Credit Card Case Study/Customer Acqusition.csv")

Repayment=read.csv("C:/Users/Skasiv/Dropbox/My PC (LAPTOP-L8J5JVJ8)/Desktop/DVA/R/Credit Card Case Study/Repayment.csv")

Spend=read.csv("C:/Users/Skasiv/Dropbox/My PC (LAPTOP-L8J5JVJ8)/Desktop/DVA/R/Credit Card Case Study/spend.csv")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(vtree)
library(ggplot2)
library(ggrepel)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(scales)
## 
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
merged=inner_join(inner_join(Spend,Repayment,by=c("Sl.No."="SL.No.")),Customer_Acquisition,by=c("Customer.x"="Customer"))
View(merged)

Ques1 1.a

merged$Age[merged$Age<18]=mean(Customer_Acquisition$Age)

1.b

merged$Amount.x[merged$Amount.x>merged$Limit]=(1/2)*merged$Limit[merged$Amount.x>merged$Limit]

1.c

merged$Amount.y[merged$Amount.y>merged$Limit]=merged$Limit[merged$Amount.y>merged$Limit]

Ques2

2.a

print(paste("Number of Distinct Customers are",n_distinct(Customer_Acquisition$Customer)))
## [1] "Number of Distinct Customers are 100"

2.b

distinct(data.frame(Customer_Acquisition$Segment))
##   Customer_Acquisition.Segment
## 1                Self Employed
## 2                 Salaried_MNC
## 3                 Salaried_Pvt
## 4                         Govt
## 5                Normal Salary
print(paste("There are",n_distinct(Customer_Acquisition$Segment),"Distinct Categories"))
## [1] "There are 5 Distinct Categories"

2.c Since we have changed some spend values on the basis of limit for each customer The monthly average spend by customers is calculated using merged dataset only

options(dplyr.summarise.inform=FALSE)
merged%>%group_by(Month=months(dmy(Month.x)))%>%summarise(Average_Spend=round(mean(Amount.x),2))
## # A tibble: 12 x 2
##    Month     Average_Spend
##    <chr>             <dbl>
##  1 April           135036.
##  2 August          165760.
##  3 December        117273.
##  4 February        151262.
##  5 January         147770.
##  6 July            166327.
##  7 June            182038.
##  8 March           131841.
##  9 May             151624.
## 10 November        150428.
## 11 October         131018.
## 12 September       141132.

2.d

options(dplyr.summarise.inform=FALSE)
merged%>%group_by(Month=months(dmy(Month.y)))%>%summarise(Average_Repayment=round(mean(Amount.y),2))
## # A tibble: 12 x 2
##    Month     Average_Repayment
##    <chr>                 <dbl>
##  1 April               167677.
##  2 August              152817.
##  3 December            190139 
##  4 February            158873.
##  5 January             163446.
##  6 July                165204.
##  7 June                114147.
##  8 March               156431.
##  9 May                 186098.
## 10 November            152692.
## 11 October             170119.
## 12 September           129346.

2.e

merged%>%group_by(months(dmy(Month.x)))%>%summarise(Profit_for_bank=(((sum(Amount.y)-sum(Amount.x))*2.9)/100))
## # A tibble: 12 x 2
##    `months(dmy(Month.x))` Profit_for_bank
##    <chr>                            <dbl>
##  1 April                           68084.
##  2 August                          13207.
##  3 December                        35309.
##  4 February                       223125.
##  5 January                        117541.
##  6 July                           -32640.
##  7 June                           -10368.
##  8 March                          152192.
##  9 May                             76720.
## 10 November                        15119.
## 11 October                         44343.
## 12 September                       28205.

2.f

merged%>%group_by(Type)%>%summarise(Frequency=length(Type))%>%arrange(desc(Frequency))%>%head(5)
## # A tibble: 5 x 2
##   Type         Frequency
##   <chr>            <int>
## 1 PETRO              200
## 2 CAMERA             160
## 3 FOOD               160
## 4 AIR TICKET         147
## 5 TRAIN TICKET       132
merged%>%group_by(Type)%>%summarise(Frequency=length(Type))%>%arrange(desc(Frequency))%>%head(5)%>%ggplot(aes(x=Type,y=Frequency))+geom_bar(stat="identity",color="dark blue",fill="yellow")

2.g

merged%>%group_by(City)%>%summarise(Spend=sum(Amount.x))%>%arrange(desc(Spend))%>%head(1)%>%select(City)
## # A tibble: 1 x 1
##   City  
##   <chr> 
## 1 COCHIN

2.h

merged%>%group_by(Age_Group=cut(Age,seq(18,88,8)))%>%summarise(Money_Spend=sum(Amount.x))%>%arrange(desc(Money_Spend))%>%head(1)
## # A tibble: 1 x 2
##   Age_Group Money_Spend
##   <fct>           <dbl>
## 1 (42,50]     53242391.

2.h Visual Form

merged%>%group_by(Age_Group=cut(Age,seq(18,88,8)))%>%summarise(Money_Spend=sum(Amount.x))%>%ggplot(aes(x=Age_Group,y=Money_Spend))+geom_bar(stat="identity",fill="dark blue")+scale_y_continuous(labels = label_number(suffix = " M", scale = 1e-6))

2.i

merged%>%group_by(Customer=Customer.y)%>%summarise(Repayment=sum(Amount.y))%>%arrange(desc(Repayment))%>%head(10)
## # A tibble: 10 x 2
##    Customer Repayment
##    <chr>        <dbl>
##  1 A61      10539143.
##  2 A60       9876291.
##  3 A13       9572001.
##  4 A43       8489871.
##  5 A45       8448335.
##  6 A12       8334760.
##  7 A14       7943269.
##  8 A44       7744730.
##  9 A39       7622483.
## 10 A42       7615461.

Ques 3

merged%>%group_by(Year=year(dmy(Month.x)),City,Product)%>%summarise(Spend=sum(Amount.x))
## # A tibble: 72 x 4
## # Groups:   Year, City [24]
##     Year City      Product     Spend
##    <dbl> <chr>     <chr>       <dbl>
##  1  2004 BANGALORE Gold     9289879.
##  2  2004 BANGALORE Platinum 1112732.
##  3  2004 BANGALORE Silver   1291853.
##  4  2004 BOMBAY    Gold     6987854.
##  5  2004 BOMBAY    Platinum  897265.
##  6  2004 BOMBAY    Silver    532089.
##  7  2004 CALCUTTA  Gold     7477141.
##  8  2004 CALCUTTA  Platinum 2037690.
##  9  2004 CALCUTTA  Silver    500006 
## 10  2004 CHENNAI   Gold     1059618.
## # ... with 62 more rows

Graphical Representation

ggplotly(merged%>%group_by(year_city=paste(Year=year(dmy(Month.x)),City),Product)%>%summarise(Spend=sum(Amount.x))%>%ggplot(aes(x=year_city,y=Spend,fill=Product))+geom_bar(stat="identity",position = "dodge")+xlab("")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+scale_y_continuous(labels = label_number(suffix = " M", scale = 1e-6)))

Ques4

4.a

merged%>%group_by(Month=months(dmy(Month.x)),City)%>%summarise(Total_Spend=sum(Amount.x))%>%ggplot(aes(x=City,y=Total_Spend,fill=Month))+geom_bar(stat="identity",position=position_dodge(width = 0.5))+scale_y_continuous(labels = label_number(suffix = " M", scale = 1e-6))+theme(axis.text.x=element_text(size=7))

4.b

merged%>%group_by(Year=year(dmy(Month.x)),Type)%>%summarise(Spend=sum(Amount.x))%>%filter(Type=="AIR TICKET")%>%ggplot(aes(x=factor(Year),y=Spend,fill=Type))+geom_bar(stat="identity",width=0.32,fill="yellow",color="dark blue")+scale_y_continuous(labels=label_number(suffix = "M",scale = 1e-6))+ggtitle("Yearly spend on Air Tickets")+guides(fill=FALSE)+xlab("Year")

4.c

merged%>%group_by(Month=months(dmy(Month.x)),Product)%>%summarise(Spend=sum(Amount.x))%>%ggplot(aes(x=Product,y=Spend,fill=Month))+geom_bar(stat="identity",position ="dodge",color="white")+scale_fill_manual(values = c("#141155","#b3b023","#d62e61","#a80000","#eab676","#2c8030","#873e23","#21130d","#A99a93","#A893a9","#93a99a","#F5d40f"))+scale_y_continuous(labels=label_number(suffix="M",scale = 1e-6))+ggtitle("        Monthly Spend For Each Product")

Ques5

## # A tibble: 211 x 5
## # Groups:   Product, Month, City [84]
##    Product Month City      Customer    Repay
##    <chr>   <chr> <chr>     <chr>       <dbl>
##  1 Gold    April BANGALORE A14       812582.
##  2 Gold    April BANGALORE A43       612542.
##  3 Gold    April BANGALORE A1        508949.
##  4 Gold    April BANGALORE A13       494392.
##  5 Gold    April BANGALORE A92       459106.
##  6 Gold    April BANGALORE A30       425694.
##  7 Gold    April BOMBAY    A62      1115760.
##  8 Gold    April BOMBAY    A12       601326.
##  9 Gold    April BOMBAY    A17       391463.
## 10 Gold    April BOMBAY    A91       247781 
## # ... with 201 more rows